*** 
*** Code for "The Labor Market Integration of Refugee Migrants in High-Income Countries"
*** Courtney Brell, Christian Dustmann, and Ian Preston
***
*** Analysis of the EU Labour Force Survey 2008 & 2014 ad-hoc modules
*** This file should be run in the folder containing the EU LFS data
*** Note that for each of the ad-hoc modules we use, all the separate country data files have previously been combined into allcountries2008_y.dta and allcountries2014_y.dta respectively
***

********************************************************************************
*** Preliminaries

clear all

********************************************************************************
*** Prepare the 2014 data 

use "AdhocModules\LFS_ahm_2014\allcountries2014_y", clear

* Survey details
rename country hostcountry
rename year yearofinterview
rename coeff weight

* Generate demographic variables
rename age age_cat
recode sex (1=0) (2=1) (else=.), gen(female)
rename countryb countryoforigin

* Calculate the number of years since arrival
gen yearssincearrive_cat=.
replace yearssincearrive_cat=0 if inlist(yearesid,"00")
replace yearssincearrive_cat=1 if inlist(yearesid,"01","02","03","04","01-04")
replace yearssincearrive_cat=2 if inlist(yearesid,"05","06","07","08","09","10","05-10")
replace yearssincearrive_cat=3 if inlist(yearesid,"11-14")
replace yearssincearrive_cat=4 if inlist(yearesid,"15-19")
replace yearssincearrive_cat=5 if missing(yearssincearrive_cat)&!missing(yearesid)
label define Lysm 0 "N/A" 1 "1-4 years" 2 "5-10 years" 3 "11-14 years" 4 "15-19 years" 5 ">19 years"
label values yearssincearrive_cat Lysm

* Identify migrant types
gen refugee=1 if ahm2014_migreas==5
gen native=1 if yearesid=="00"&countryoforigin=="000-OWN COUNTRY"
gen immigrant=1 if refugee!=1&yearesid!="00"&!missing(yearesid)&countryoforigin!="000-OWN COUNTRY"
gen migranttype=0 if native==1
replace migranttype=1 if refugee==1
replace migranttype=2 if immigrant==1
label define Lmigrant 0 "Native" 1 "Refugee" 2 "Other immigrant"
label values migranttype Lmigrant

* Labor market outcomes
gen employed=(ilostat==1)
gen incomedecile=incdecil if incdecil!=99

* Language outcomes
gen language=ahm2014_langhost if ahm2014_langhost!=9
label define Llang 1 "Mother tongue" 2 "Advanced" 3 "Intermediate" 4 "Beginner or less"
label values language Llang
recode language (1 2=1) (3 4=0), gen(lang_atleastadv)
recode language (1 2 3=0) (4=1), gen(lang_begatbest)

* Education
gen educ=.
replace educ=1 if hatlev1d=="L"
replace educ=2 if hatlev1d=="M"
replace educ=3 if hatlev1d=="H"
label define Leduc 1 "Low" 2 "Med" 3 "High"
label values educ Leduc

* Currently in education
recode mainstat (3=1)(.=.)(else=0), gen(student)
replace student=0 if missing(mainstat)&educstat==2
replace student=1 if missing(mainstat)&(educstat==1|educstat==3)

* Sample of interest
keep if age_cat>=22&age_cat<=62
drop if missing(migranttype)
keep if student==0

* Reformat and save for comparison with 2008
rename ahm2014_migreas migreas
label define Lmigreas 1 "Employment, prior" 2 "Employment, not prior" 3 "Family" 4 "Study" 5 "Asylum" 6 "Other" 9 "N/A"
label values migreas Lmigreas
keep hostcountry yearofinterview weight age_cat female yearssincearrive_cat countryoforigin migranttype migreas employed incomedecile language lang_atleastadv lang_begatbest educ student
save "temp2014", replace

********************************************************************************
*** Prepare the 2008 data 

use "AdhocModules\LFS_ahm_2008\allcountries2008_y", clear

* Survey details
rename country hostcountry
rename year yearofinterview
rename coeff weight

* Generate demographic variables
rename age age_cat
recode sex (1=0) (2=1) (else=.), gen(female)
rename countryb countryoforigin

* Calculate the number of years since arrival
gen yearssincearrive_cat=.
replace yearssincearrive_cat=0 if inlist(yearesid,"00")
replace yearssincearrive_cat=1 if inlist(yearesid,"01","02","03","04","01-04")
replace yearssincearrive_cat=2 if inlist(yearesid,"05","06","07","08","09","10","05-10")
replace yearssincearrive_cat=3 if inlist(yearesid,"11-14")
replace yearssincearrive_cat=4 if inlist(yearesid,"15-19")
replace yearssincearrive_cat=5 if missing(yearssincearrive_cat)&!missing(yearesid)
label define Lysm 0 "N/A" 1 "1-4 years" 2 "5-10 years" 3 "11-14 years" 4 "15-19 years" 5 ">19 years"
label values yearssincearrive_cat Lysm

* Identify migrant types
gen refugee=1 if ahm2008_migreas==5
gen native=1 if yearesid=="00"&countryoforigin=="000-OWN COUNTRY"
gen immigrant=1 if refugee!=1&yearesid!="00"&!missing(yearesid)&countryoforigin!="000-OWN COUNTRY"
gen migranttype=0 if native==1
replace migranttype=1 if refugee==1
replace migranttype=2 if immigrant==1
label define Lmigrant 0 "Native" 1 "Refugee" 2 "Other immigrant"
label values migranttype Lmigrant

* Labor market outcomes
gen employed=(ilostat==1)
gen incomedecile=incdecil if incdecil!=99

* Education
gen educ=.
replace educ=1 if hatlev1d=="L"
replace educ=2 if hatlev1d=="M"
replace educ=3 if hatlev1d=="H"
label define Leduc 1 "Low" 2 "Med" 3 "High"
label values educ Leduc

* Currently in education
recode mainstat (3=1)(.=.)(else=0), gen(student)
replace student=0 if missing(mainstat)&educstat==2
replace student=1 if missing(mainstat)&(educstat==1|educstat==3)

* Sample of interest
keep if age_cat>=22&age_cat<=62
drop if missing(migranttype)
keep if student==0

* Reformat and save
rename ahm2008_migreas migreas
label define Lmigreas 1 "Employment, transfer" 2 "Employment, prior" 3 "Employment, not prior" 4 "Study" 5 "Asylum" 6 "Family, unification" 7 "Family, formation" 8 "Other" 9 "N/A"
label values migreas Lmigreas
keep hostcountry yearofinterview weight age_cat female yearssincearrive_cat countryoforigin migranttype employed incomedecile migreas educ student
save "temp2008", replace

********************************************************************************
*** Compute sample descriptives

*** 2014 sample only
use "temp2014", clear

forval loopmig=0(1)2{
preserve
	disp `loopmig'
	keep if migranttype==`loopmig'

	* # Observations
	count

	* Gender
	sum female [aw=weight]
	* Age
	sum age [aw=weight], detail

	* Time since arrival
	sum yearssincearrive_cat [aw=weight], detail

	* LM outcomes
	count if !missing(employed)
	sum employed [aw=weight], detail

	* Education
	replace educ=-1 if missing(educ)
	tab educ [aw=weight]

	* Language
	replace language=-1 if missing(language)
	tab language [aw=weight]
	replace lang_atleastadv=0 if missing(lang_atleastadv)
	sum lang_atleastadv [aw=weight]
	replace lang_begatbest=0 if missing(lang_begatbest)
	sum lang_begatbest [aw=weight]
	
	* Country of origin
	capture gen dummy=1
	collapse (sum) numrefugees=dummy [iw=weight], by(countryoforigin)
	egen totalrefugees=sum(numrefugees)
	gen fracrefugees=numrefugees/totalrefugees
	gsort -fracrefugees
	gen thecounter=_n
	list countryoforigin fracrefugees if thecounter<=10
restore
}

* Pooled sample
use "temp2008", clear
append using "temp2014"

forval loopmig=0(1)2{
preserve
	disp `loopmig'
	keep if migranttype==`loopmig'

	* # Observations
	count

	* Gender
	sum female [aw=weight]
	* Age
	sum age [aw=weight], detail

	* Time since arrival
	sum yearssincearrive_cat [aw=weight], detail

	* LM outcomes
	count if !missing(employed)
	sum employed [aw=weight], detail

	* Education
	replace educ=-1 if missing(educ)
	tab educ [aw=weight]

	* Country of origin
	capture gen dummy=1
	collapse (sum) numrefugees=dummy [iw=weight], by(countryoforigin)
	egen totalrefugees=sum(numrefugees)
	gen fracrefugees=numrefugees/totalrefugees
	gsort -fracrefugees
	gen thecounter=_n
	list countryoforigin fracrefugees if thecounter<=10
restore
}

********************************************************************************
*** Generate unconditional plots

* Use only 2014 data
use "temp2014", clear

* Keep only migrants and refugees
keep if migranttype!=0

* Define a variable specifying subgroups for the plots
gen insample=.
replace insample=3 if (inlist(yearssincearrive_cat,5))
replace insample=2 if (inlist(yearssincearrive_cat,3,4))
replace insample=1 if (inlist(yearssincearrive_cat,1,2))

*** Employment
local minsample="10"
preserve
	keep if insample==1|insample==2|insample==3
	drop if missing(employed)
	gen dummy=1
	collapse (mean) employed (rawsum) samplesize=dummy weightedsamplesize=weight [aw=weight], by(hostcountry migranttype insample)
	* Merge in the limits which need to be censored for reliability
	rename hostcountry country
	merge m:1 country using "AdhocModules\datafileinfo\datafileinfo_ahm_2014", keepusing(mlimit_a mlimit_b) nogen
	rename country hostcountry
	gen below_alimit=(1000*weightedsamplesize<mlimit_a)
	keep if samplesize>`minsample'&!below_alimit
	drop samplesize below_alimit mlimit* weightedsamplesize
	* Plot data
	reshape wide employed, i(hostcountry insample) j(migranttype)
	twoway ///
		(scatter employed1 employed2 if insample==1) ///
		(scatter employed1 employed2 if insample==2) ///
		(scatter employed1 employed2 if insample==3) ///
		(function y = x, color(black) range(0 1)) ///
		, xtitle("Other immigrants", height(5)) ytitle("Refugees", height(5)) legend(order(1 "<=10 years" 2 "11-19 years" 3 ">=20 years"))
	graph save "employment_unconditional", replace
restore

*** Language
preserve
	keep if insample==1|insample==2|insample==3
	drop if missing(lang_atleastadv)
	gen dummy=1
	collapse (mean) lang_atleastadv (rawsum) samplesize=dummy weightedsamplesize=weight [aw=weight], by(hostcountry migranttype insample)
	* Merge in the limits which need to be censored for reliability
	rename hostcountry country
	merge m:1 country using "AdhocModules\datafileinfo\datafileinfo_ahm_2014", keepusing(mlimit_a mlimit_b) nogen
	rename country hostcountry
	gen below_alimit=(1000*weightedsamplesize<mlimit_a)
	keep if samplesize>`minsample'&!below_alimit
	drop samplesize below_alimit mlimit* weightedsamplesize
	* Plot data
	reshape wide lang_atleastadv, i(hostcountry insample) j(migranttype)
	twoway ///
		(scatter lang_atleastadv1 lang_atleastadv2 if insample==1) ///
		(scatter lang_atleastadv1 lang_atleastadv2 if insample==2) ///
		(scatter lang_atleastadv1 lang_atleastadv2 if insample==3) ///
		(function y = x, color(black) range(0 1)) ///
		, xtitle("Other immigrants", height(5)) ytitle("Refugees", height(5)) legend(order(1 "<=10 years" 2 "11-19 years" 3 ">=20 years"))
	graph save "language_unconditional", replace
restore

********************************************************************************
*** Generate conditional plots

* Use both 2008 and 2014 data
use "temp2008", clear
append using "temp2014"

* Setup data
encode hostcountry, gen(hostcountrycodes)
// Note that UK=30
drop hostcountry
rename hostcountrycodes hostcountry
encode countryoforigin, gen(origincountry)
rename yearssincearrive_cat ysm

* Make a matrix to store the regression results
// Columns of this matrix: Unconditional refugee, unconditional other immigrant, conditional refugee, conditional other immigrant
// Rows of this matrix are different time since arrival groups
matrix empgaps=J(4,5,.)

* Calculate conditional employment gaps
reg employed i.ysm##ib0.migranttype ib30.hostcountry##ib2014.yearofinterview i.age_cat i.female i.educ [pw=weight], vce(robust)
forvalues loopysm = 1/5 {
	forvalues loopmigreas = 1/2{
		matrix empgaps[`loopmigreas'+2,`loopysm']=_b[`loopmigreas'.migranttype]+_b[`loopysm'.ysm]+_b[`loopysm'.ysm#`loopmigreas'.migranttype]
	}
}
* Calculate unconditional employment gaps (condition only on host country-survey year)
reg employed i.ysm##ib0.migranttype ib30.hostcountry##ib2014.yearofinterview [pw=weight], vce(robust)
forvalues loopysm = 1/5 {
	forvalues loopmigreas = 1/2{
		matrix empgaps[`loopmigreas',`loopysm']=_b[`loopmigreas'.migranttype]+_b[`loopysm'.ysm]+_b[`loopysm'.ysm#`loopmigreas'.migranttype]
	}
}

* Plot these results
preserve
	clear
	matrix empgaps=empgaps'
	svmat empgaps
	gen ysm=_n
	capture label define Lysm 0 "N/A" 1 "1-4 years" 2 "5-10 years" 3 "11-14 years" 4 "15-19 years" 5 ">19 years"
	label values ysm Lysm
	twoway ///
		(connected empgaps1 ysm, sort) ///
		(connected empgaps2 ysm, sort) ///
		(connected empgaps3 ysm, sort) ///
		(connected empgaps4 ysm, sort) ///
		, yline(0, lcolor(black)) legend(order(1 "Refugees, naive" 2 "Other immigrants, naive" 3 "Refugees, conditional" 4 "Other immigrants, conditional")) ///
		xtitle("Years since migration") ytitle("Employment gap to natives") xlabel(1 2 3 4 5,valuelabel)
	graph save "employment_conditional", replace
restore

********************************************************************************
*** Clean up

capture noisily erase "temp2008.dta"
capture noisily erase "temp2014.dta"
clear all
